1 Introduction

Here, we will cluster the cells of the patient with id “365”.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_clustered_365.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))


# Thresholds

2.3 Load data

seurat <- readRDS(path_to_obj)

3 Subset and reprocess

Of note, everytime that we filter out a specific population, the set of highly variable genes (HVG) and the axis of variability (PCs) changes. Thus, we need to rerun the main analysis steps iteratively.

First, we focus on the patient studied in this notebook:

seurat <- subset(seurat, donor_id == "365")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")

As we can see, there is a large batch effect between the two experiments we conducted BCLLATLAS_10 and BCLLATLAS_29. However, contrary to the previous cases, here BCLLATLAS_29 contains all time-points and has a better quality than BCLLATLAS_10. Thus, in this case we will focus first on BCLLATLAS_29 (not hashed) and later in BCLLATLAS_10 (hashed).

seurat <- subset(seurat, subproject == "BCLLATLAS_29")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")

FeaturePlot(seurat, "pct_mt") +
  scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "nFeature_RNA") +
  scale_color_viridis_c(option = "magma")

We see a subpopulation with characteristics of low-quality cells: high mitochondrial expression and low number of detected features (genes). Hence, let us exclude it from the dataset:

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5134
## Number of edges: 172844
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8371
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(seurat)

seurat <- subset(seurat, seurat_clusters != "2")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat)

DimPlot(seurat, group.by = "time_point")

4 Cluster

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.65)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4691
## Number of edges: 158278
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7324
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(seurat)

Notably, we observe how we can stratify cluster 4 based on MIR155HG and ENO1 expression:

FeaturePlot(seurat, "MIR155HG") + scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "ENO1") + scale_color_viridis_c(option = "magma")

seurat <- FindSubCluster(
  seurat,
  cluster = "4",
  graph.name = "RNA_snn",
  subcluster.name = "CCND2_subclusters",
  resolution = 0.2
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 397
## Number of edges: 15482
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8018
## Number of communities: 2
## Elapsed time: 0 seconds
seurat$final_clusters <- seurat$CCND2_subclusters
Idents(seurat) <- "final_clusters"
DimPlot(seurat)

5 Save

saveRDS(seurat, path_to_save)

6 Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.6          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.3        tidyverse_1.3.1      harmony_1.0          Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.18.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           remotes_2.4.0         globals_0.14.0        modelr_0.1.8          matrixStats_0.59.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.4.1           xfun_0.23             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.8          future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             miniUI_0.1.1.1        viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   rsvd_1.0.5            htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.10           dbplyr_2.1.1         
##  [55] deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.7           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         knitr_1.33            fs_1.5.0              fitdistrplus_1.1-5    RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            compiler_4.0.4        rstudioapi_0.13       plotly_4.9.4          png_0.1-7             spatstat.utils_2.2-0  reprex_2.0.0          bslib_0.2.5.1         stringi_1.6.2         highr_0.9             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-4          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.15   spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0             
## [109] bookdown_0.22         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         parallelly_1.26.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-36           parallel_4.0.4        hms_1.1.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.8         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10